{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2023-10-17T07:35:26.699159500Z",
     "start_time": "2023-10-17T07:35:26.665885700Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import re\n",
    "import openpyxl\n",
    "from warnings import filterwarnings\n",
    "from pandas import DataFrame\n",
    "\n",
    "filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "outputs": [],
   "source": [
    "class ZhDataClear:\n",
    "    \"\"\"\n",
    "    一个英文数据清洗的 工具类\n",
    "    \"\"\"\n",
    "\n",
    "    def __init__(self, data_frame):\n",
    "        \"\"\"\n",
    "        初始化 工具类\n",
    "        :param data_frame: 传入Dataframe对象\n",
    "        \"\"\"\n",
    "        # 删除重复行\n",
    "        data_frame = data_frame.drop_duplicates(keep='last')\n",
    "        # 去掉缺失值\n",
    "        data_frame = data_frame.dropna(subset=['职位', '任职要求'])\n",
    "        self.df = data_frame\n",
    "\n",
    "    @staticmethod\n",
    "    def convert_fullwidth_to_halfWidth(text):\n",
    "        '''\n",
    "        全角字符统一为半全角字符\n",
    "        '''\n",
    "        halfwidth_chars = \"1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\\\"#$%&'()*+,-./:;<=>?@[\\\\]^_`{|}~\"\n",
    "\n",
    "        # 对应的全角字符Unicode范围\n",
    "        fullwidth_chars = \"１２３４５６７８９０ａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺ！“＃＄％＆'（）＊＋，－．／：；＜＝＞？＠［＼］＾＿｀｛｜｝～\"\n",
    "\n",
    "        char_mapping = dict()\n",
    "        for i in range(len(halfwidth_chars)):\n",
    "            char_mapping[ord(fullwidth_chars[i])] = halfwidth_chars[i]\n",
    "        return text.translate(char_mapping)\n",
    "\n",
    "    @staticmethod\n",
    "    def replace_special_char(cleaned_text):\n",
    "        \"\"\"\n",
    "        去除特殊字符\n",
    "        :param cleaned_text: \n",
    "        :return: \n",
    "        \"\"\"        \n",
    "        # 使用正则表达式删除以 \"@\" 开头的邮箱地址\n",
    "        cleaned_text = re.sub(r'@\\w+\\.\\w+', ' ', cleaned_text)\n",
    "        # 去除URLs\n",
    "        cleaned_text = re.sub(r'https?://\\S+|www\\.\\S+', ' ', cleaned_text)\n",
    "        # 去除HTML标记\n",
    "        pattern = re.compile('<.*?>')\n",
    "        cleaned_text = re.sub(pattern, ' ', cleaned_text)\n",
    "        # 去掉一些特殊字符\n",
    "        cleaned_text = re.sub(\"\\]|\\\\|\\||\\\"|\\{|\\}|<|=|~|\\)|>|\\[|\\(|/\", ' ', cleaned_text)\n",
    "        cleaned_text = re.sub('\\n', \" \", cleaned_text)\n",
    "        all_char.update(re.findall(\"\\W\",cleaned_text))\n",
    "        return cleaned_text\n",
    "\n",
    "    def run(self, text):\n",
    "        \"\"\"\n",
    "        文本清洗\n",
    "        :param text: \n",
    "        \"\"\"\n",
    "        # 全角字符统一为半全角字符\n",
    "        text = self.convert_fullwidth_to_halfWidth(text.strip())\n",
    "        # 去除特殊字符\n",
    "        text = self.replace_special_char(text)\n",
    "        return text\n",
    "\n",
    "    def data_clear(self, row):\n",
    "        \"\"\"\n",
    "        清洗文本的工具函数\n",
    "        :param row: self.df 的每一行 \n",
    "        :return: 返回处理后的 self.df每一行 \n",
    "        \"\"\"\n",
    "        row['任职要求'] = self.run(row['任职要求'])\n",
    "        row['职位'] = self.run(row['职位'])\n",
    "        return row\n",
    "\n",
    "    def main_(self) -> DataFrame:\n",
    "        \"\"\"\n",
    "        执行函数\n",
    "        :return: 文本清洗后的 df \n",
    "        \"\"\"\n",
    "        self.df.apply(func=self.data_clear, axis=1)\n",
    "        return self.df"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-10-17T07:35:26.710673800Z",
     "start_time": "2023-10-17T07:35:26.676110600Z"
    }
   },
   "id": "6cea7e782d55beb4"
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "outputs": [],
   "source": [
    "all_char = set()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-10-17T07:35:26.746295200Z",
     "start_time": "2023-10-17T07:35:26.705159900Z"
    }
   },
   "id": "ed9d36d8a77d941d"
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "当前网站: CareerBuilder\n",
      "当前网站: Simplyhired\n",
      "当前网站: linkedin\n",
      "当前网站: CIA\n",
      "当前网站: DNI\n",
      "当前网站: 智联招聘\n",
      "当前网站: boss直聘\n"
     ]
    },
    {
     "data": {
      "text/plain": "{'\\t',\n '\\r',\n ' ',\n '!',\n '#',\n '$',\n '%',\n '&',\n \"'\",\n '*',\n '+',\n ',',\n '-',\n '.',\n ':',\n ';',\n '?',\n '@',\n '\\\\',\n '^',\n '`',\n '|',\n '\\x9f',\n '\\xa0',\n '§',\n '«',\n '´',\n '·',\n '»',\n '\\u2002',\n '\\u200b',\n '‒',\n '–',\n '—',\n '‘',\n '’',\n '”',\n '•',\n '…',\n '‧',\n '\\u2028',\n '⃣',\n '→',\n '↓',\n '−',\n '∣',\n '∶',\n '≥',\n '≦',\n '■',\n '▪',\n '►',\n '◆',\n '◎',\n '●',\n '◼',\n '★',\n '☆',\n '♦',\n '✅',\n '✨',\n '❀',\n '❗',\n '➕',\n '⭐',\n '⻅',\n '⻔',\n '⻛',\n '⼒',\n '⼯',\n '⼰',\n '⼴',\n '⽂',\n '⽅',\n '⽣',\n '⽤',\n '⽬',\n '⾃',\n '⾊',\n '⾏',\n '⾦',\n '\\u3000',\n '、',\n '。',\n '《',\n '》',\n '「',\n '」',\n '『',\n '』',\n '【',\n '】',\n '\\uf06c',\n '\\uf09e',\n '\\uf09f',\n '\\uf0b7',\n '\\uf0d8',\n '\\uf0fc',\n '️',\n '\\ufeff',\n '＂',\n '�',\n '🈶',\n '🌈',\n '🌟',\n '🌸',\n '🌺',\n '🌻',\n '🍁',\n '🎁',\n '🎉',\n '🎖',\n '🎸',\n '🎻',\n '🏀',\n '🏁',\n '🏅',\n '🏠',\n '🐣',\n '👏',\n '👣',\n '💎',\n '💜',\n '💝',\n '💡',\n '💪',\n '🔸',\n '🔹',\n '😁',\n '😊',\n '😎',\n '😤',\n '🚁',\n '🤔',\n '🥇',\n '🧩'}"
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "workbook = openpyxl.load_workbook('../Data/数据汇总.xlsx')\n",
    "\n",
    "for sheet_name in workbook.sheetnames:\n",
    "    item = dict()\n",
    "    print(f\"当前网站: {sheet_name}\")\n",
    "    df = pd.read_excel('../Data/数据汇总.xlsx', sheet_name=sheet_name, index_col=0)\n",
    "    if re.search('[\\u4e00-\\u9fa5]', sheet_name):  # 中文网站\n",
    "        df = ZhDataClear(data_frame=df).main_()\n",
    "        # break\n",
    "    else:  # 英文网站\n",
    "        # df = EnDataClear(data_frame=df).main_()\n",
    "        # item[\"Origin_name\"] = sheet_name\n",
    "        # item[\"df\"] = df\n",
    "        # EnDataPreprocessingAndBuildCorpus.save_dfs_to_excel(item)\n",
    "        pass\n",
    "all_char"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-10-17T07:35:30.715305Z",
     "start_time": "2023-10-17T07:35:26.718673700Z"
    }
   },
   "id": "1d55c2ea265464c2"
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "outputs": [
    {
     "ename": "PermissionError",
     "evalue": "[Errno 13] Permission denied: '智联招聘.csv'",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mPermissionError\u001B[0m                           Traceback (most recent call last)",
      "Cell \u001B[1;32mIn[29], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m \u001B[43mdf\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mto_csv\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43m智联招聘.csv\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\n",
      "File \u001B[1;32mD:\\tools_installed\\anaconda\\envs\\bishe\\lib\\site-packages\\pandas\\core\\generic.py:3902\u001B[0m, in \u001B[0;36mNDFrame.to_csv\u001B[1;34m(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, lineterminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options)\u001B[0m\n\u001B[0;32m   3891\u001B[0m df \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(\u001B[38;5;28mself\u001B[39m, ABCDataFrame) \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mto_frame()\n\u001B[0;32m   3893\u001B[0m formatter \u001B[38;5;241m=\u001B[39m DataFrameFormatter(\n\u001B[0;32m   3894\u001B[0m     frame\u001B[38;5;241m=\u001B[39mdf,\n\u001B[0;32m   3895\u001B[0m     header\u001B[38;5;241m=\u001B[39mheader,\n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m   3899\u001B[0m     decimal\u001B[38;5;241m=\u001B[39mdecimal,\n\u001B[0;32m   3900\u001B[0m )\n\u001B[1;32m-> 3902\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mDataFrameRenderer\u001B[49m\u001B[43m(\u001B[49m\u001B[43mformatter\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mto_csv\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m   3903\u001B[0m \u001B[43m    \u001B[49m\u001B[43mpath_or_buf\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3904\u001B[0m \u001B[43m    \u001B[49m\u001B[43mlineterminator\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mlineterminator\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3905\u001B[0m \u001B[43m    \u001B[49m\u001B[43msep\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43msep\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3906\u001B[0m \u001B[43m    \u001B[49m\u001B[43mencoding\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mencoding\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3907\u001B[0m \u001B[43m    \u001B[49m\u001B[43merrors\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43merrors\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3908\u001B[0m \u001B[43m    \u001B[49m\u001B[43mcompression\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcompression\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3909\u001B[0m \u001B[43m    \u001B[49m\u001B[43mquoting\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mquoting\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3910\u001B[0m \u001B[43m    \u001B[49m\u001B[43mcolumns\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcolumns\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3911\u001B[0m \u001B[43m    \u001B[49m\u001B[43mindex_label\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mindex_label\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3912\u001B[0m \u001B[43m    \u001B[49m\u001B[43mmode\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mmode\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3913\u001B[0m \u001B[43m    \u001B[49m\u001B[43mchunksize\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mchunksize\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3914\u001B[0m \u001B[43m    \u001B[49m\u001B[43mquotechar\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mquotechar\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3915\u001B[0m \u001B[43m    \u001B[49m\u001B[43mdate_format\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdate_format\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3916\u001B[0m \u001B[43m    \u001B[49m\u001B[43mdoublequote\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdoublequote\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3917\u001B[0m \u001B[43m    \u001B[49m\u001B[43mescapechar\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mescapechar\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3918\u001B[0m \u001B[43m    \u001B[49m\u001B[43mstorage_options\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mstorage_options\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m   3919\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n",
      "File \u001B[1;32mD:\\tools_installed\\anaconda\\envs\\bishe\\lib\\site-packages\\pandas\\io\\formats\\format.py:1152\u001B[0m, in \u001B[0;36mDataFrameRenderer.to_csv\u001B[1;34m(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, lineterminator, chunksize, date_format, doublequote, escapechar, errors, storage_options)\u001B[0m\n\u001B[0;32m   1131\u001B[0m     created_buffer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[0;32m   1133\u001B[0m csv_formatter \u001B[38;5;241m=\u001B[39m CSVFormatter(\n\u001B[0;32m   1134\u001B[0m     path_or_buf\u001B[38;5;241m=\u001B[39mpath_or_buf,\n\u001B[0;32m   1135\u001B[0m     lineterminator\u001B[38;5;241m=\u001B[39mlineterminator,\n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m   1150\u001B[0m     formatter\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mfmt,\n\u001B[0;32m   1151\u001B[0m )\n\u001B[1;32m-> 1152\u001B[0m \u001B[43mcsv_formatter\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43msave\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m   1154\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m created_buffer:\n\u001B[0;32m   1155\u001B[0m     \u001B[38;5;28;01massert\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(path_or_buf, StringIO)\n",
      "File \u001B[1;32mD:\\tools_installed\\anaconda\\envs\\bishe\\lib\\site-packages\\pandas\\io\\formats\\csvs.py:247\u001B[0m, in \u001B[0;36mCSVFormatter.save\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m    243\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[0;32m    244\u001B[0m \u001B[38;5;124;03mCreate the writer & save.\u001B[39;00m\n\u001B[0;32m    245\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[0;32m    246\u001B[0m \u001B[38;5;66;03m# apply compression and byte/text conversion\u001B[39;00m\n\u001B[1;32m--> 247\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[43mget_handle\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m    248\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfilepath_or_buffer\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    249\u001B[0m \u001B[43m    \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmode\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    250\u001B[0m \u001B[43m    \u001B[49m\u001B[43mencoding\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mencoding\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    251\u001B[0m \u001B[43m    \u001B[49m\u001B[43merrors\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43merrors\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    252\u001B[0m \u001B[43m    \u001B[49m\u001B[43mcompression\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcompression\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    253\u001B[0m \u001B[43m    \u001B[49m\u001B[43mstorage_options\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mstorage_options\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    254\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mas\u001B[39;00m handles:\n\u001B[0;32m    255\u001B[0m     \u001B[38;5;66;03m# Note: self.encoding is irrelevant here\u001B[39;00m\n\u001B[0;32m    256\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mwriter \u001B[38;5;241m=\u001B[39m csvlib\u001B[38;5;241m.\u001B[39mwriter(\n\u001B[0;32m    257\u001B[0m         handles\u001B[38;5;241m.\u001B[39mhandle,\n\u001B[0;32m    258\u001B[0m         lineterminator\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mlineterminator,\n\u001B[1;32m   (...)\u001B[0m\n\u001B[0;32m    263\u001B[0m         quotechar\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mquotechar,\n\u001B[0;32m    264\u001B[0m     )\n\u001B[0;32m    266\u001B[0m     \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_save()\n",
      "File \u001B[1;32mD:\\tools_installed\\anaconda\\envs\\bishe\\lib\\site-packages\\pandas\\io\\common.py:863\u001B[0m, in \u001B[0;36mget_handle\u001B[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001B[0m\n\u001B[0;32m    858\u001B[0m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(handle, \u001B[38;5;28mstr\u001B[39m):\n\u001B[0;32m    859\u001B[0m     \u001B[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001B[39;00m\n\u001B[0;32m    860\u001B[0m     \u001B[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001B[39;00m\n\u001B[0;32m    861\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m ioargs\u001B[38;5;241m.\u001B[39mencoding \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mb\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m ioargs\u001B[38;5;241m.\u001B[39mmode:\n\u001B[0;32m    862\u001B[0m         \u001B[38;5;66;03m# Encoding\u001B[39;00m\n\u001B[1;32m--> 863\u001B[0m         handle \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mopen\u001B[39;49m\u001B[43m(\u001B[49m\n\u001B[0;32m    864\u001B[0m \u001B[43m            \u001B[49m\u001B[43mhandle\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    865\u001B[0m \u001B[43m            \u001B[49m\u001B[43mioargs\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmode\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    866\u001B[0m \u001B[43m            \u001B[49m\u001B[43mencoding\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mioargs\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mencoding\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    867\u001B[0m \u001B[43m            \u001B[49m\u001B[43merrors\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43merrors\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m    868\u001B[0m \u001B[43m            \u001B[49m\u001B[43mnewline\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m    869\u001B[0m \u001B[43m        \u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m    870\u001B[0m     \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m    871\u001B[0m         \u001B[38;5;66;03m# Binary mode\u001B[39;00m\n\u001B[0;32m    872\u001B[0m         handle \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mopen\u001B[39m(handle, ioargs\u001B[38;5;241m.\u001B[39mmode)\n",
      "\u001B[1;31mPermissionError\u001B[0m: [Errno 13] Permission denied: '智联招聘.csv'"
     ]
    }
   ],
   "source": [
    "df.to_csv(\"智联招聘.csv\")"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-10-17T07:35:31.275289200Z",
     "start_time": "2023-10-17T07:35:30.703230800Z"
    }
   },
   "id": "162eca2731c7e8fa"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
